Metadata

Close
Metadata
@InProceedings{BastosMeloSchw:2019:MuReRe,
               author = "Bastos, Igor Leonardo Oliveira and Melo, Victor Hugo Cunha de and 
                         Schwartz, William Robson",
          affiliation = "{Universidade Federal de Minas Gerais} and {Universidade Federal 
                         de Minas Gerais} and {Universidade Federal de Minas Gerais}",
                title = "Multi-Loss Recurrent Residual Networks for Gesture Detection and 
                         Recognition",
            booktitle = "Proceedings...",
                 year = "2019",
               editor = "Oliveira, Luciano Rebou{\c{c}}as de and Sarder, Pinaki and Lage, 
                         Marcos and Sadlo, Filip",
         organization = "Conference on Graphics, Patterns and Images, 32. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "Gesture detection, gesture recognition, recurrent models, 
                         multi-task.",
             abstract = "Communication through gestures plays a relevant role in human 
                         life, in which a non-verbal language is used to propagate 
                         information among individuals. To recognize gestures, computers 
                         need to represent and interpret human appearance and motion, 
                         involving hands, arms, face, head and/or body, in a mathematical 
                         sense. Despite the high applicability in different contexts, most 
                         gesture recognition approaches in literature are not designed to 
                         deal with unsegmented videos. That is, most approaches do not 
                         temporally detect when a gesture occurs, which prevents to explore 
                         correlations between detection and recognition tasks, besides 
                         their application on real-world scenarios. In this sense, we 
                         propose the Multi-Loss Recurrent Residual Network (MLRRN), a 
                         multi-task based approach that performs both the recognition and 
                         temporal detection of gestures at once. It employs a dual loss 
                         function which takes into account the class assignment of each 
                         frame of a video to a gesture class and also determines the frame 
                         interval associated to each gesture. Our model counts with a dual 
                         input, gathering information from appearance and human pose on 
                         frames, besides bidirectional recurrent layers and residual 
                         modules. According to experiments conducted on ChaLearn Montalbano 
                         and ChaLearn ConGD datasets, our approach achieves results 
                         comparable to state-of-the-art methods considering average 
                         temporal Jaccard metric.",
  conference-location = "Rio de Janeiro, RJ, Brazil",
      conference-year = "28-31 Oct. 2019",
                  doi = "10.1109/SIBGRAPI.2019.00031",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI.2019.00031",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/3U2KT7L",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/3U2KT7L",
           targetfile = "camera_ready_mlrrn.pdf",
        urlaccessdate = "2024, Apr. 28"
}